##############################################################
### Replication Code for Dictionary-Based Content Analysis ###
### Title: Relative Gains in the Shadow of a Trade War     ###
### Authors: Eddy S. F. Yeung and Kai Quek                 ###
### Version: December 6, 2021                              ###
##############################################################

### Set-up ###
## Clean the R environment and set the working directory
# RStudio version: 1.3.959
rm(list = ls())
setwd("~/Downloads/trade_replication") # change to your own working directory

## Load the required packages
library(quanteda)  # version 3.2.0
library(readxl)    # version 1.3.1
library(tidyverse) # version 1.3.1
library(extrafont) # version 0.17
# windowsFonts("Times" = windowsFont("Times")) # may be required for Windows users

## Import the dataset that contains all open-ended responses
df <- read_excel("open-ended_coding.xlsx")

## Subset the dataset (win-lose scenario only)
df.China.ctrl <- df %>% filter(exp_group == 1)
df.China.treat <- df %>% filter(exp_group == 5)
df.CntryX.ctrl <- df %>% filter(exp_group == 2)
df.CntryX.treat <- df %>% filter(exp_group == 7)

### Figure A11: Percentages of Other-Regarding and Self-Serving Responses in the Win-Lose Scenario ###
## Create a dictionary for "other-regarding" and "self-serving" responses
myDict <- 
  dictionary(list(other.reg = 
                    c("harm*", "hurt*", "selfish", "equal*", "other*", "human*",
                      "inhuman*", "wrong", "incorrect", "suffer*", "equit*",
                      "unfair*", "expense*", "unethic*", "inapprop*", "moral*",
                      "unscrupl*", "miser*", "harsh", "unnecessar*"),
                  self.serve = 
                    c("job*", "help*", "econom*", "protect", "first",
                      "priorit*", "regain", "own")))

## Analysis for df.China.ctrl
# Tokenization and normalization
dtm.China.ctrl <- dfm(df.China.ctrl$open_ended,          # input text
                      tolower = T, stem = T)             # lowercase and stem
##docfreq(dtm.China.ctrl) # this shows the frequency per term

# Analyze the open-ended responses based on simple counting
dtm.China.ctrl <- dfm_lookup(dtm.China.ctrl, myDict)
dtm.China.ctrl <- convert(dtm.China.ctrl, to = "data.frame") # convert to data frame
dtm.China.ctrl$other.reg <- ifelse(dtm.China.ctrl$other.reg > 0, 1, 0)
dtm.China.ctrl$self.serve <- ifelse(dtm.China.ctrl$self.serve > 0, 1, 0)
##sum(dtm.China.ctrl$other.reg)  # this counts how many responses contain other-regarding vocab
##sum(dtm.China.ctrl$self.serve) # this counts how many responses contain self-serving vocab

## Analysis for df.China.treat
# Tokenization and normalization
dtm.China.treat <- dfm(df.China.treat$open_ended,        # input text
                       tolower = T, stem = T)            # lowercase and stem
##docfreq(dtm.China.treat) # this shows the frequency per term

# Analyze the open-ended responses based on simple counting
dtm.China.treat <- dfm_lookup(dtm.China.treat, myDict)
dtm.China.treat <- convert(dtm.China.treat, to = "data.frame") # convert to data frame
dtm.China.treat$other.reg <- ifelse(dtm.China.treat$other.reg > 0, 1, 0)
dtm.China.treat$self.serve <- ifelse(dtm.China.treat$self.serve > 0, 1, 0)
##sum(dtm.China.treat$other.reg)  # this counts how many responses contain other-regarding vocab
##sum(dtm.China.treat$self.serve) # this counts how many responses contain self-serving vocab

## Analysis for df.CntryX.ctrl
# Tokenization and normalization
dtm.CntryX.ctrl <- dfm(df.CntryX.ctrl$open_ended,        # input text
                       tolower = T, stem = T)            # lowercase and stem
##docfreq(dtm.CntryX.ctrl) # this shows the frequency per term

# Analyze the open-ended responses based on simple counting
dtm.CntryX.ctrl <- dfm_lookup(dtm.CntryX.ctrl, myDict)
dtm.CntryX.ctrl <- convert(dtm.CntryX.ctrl, to = "data.frame") # convert to data frame
dtm.CntryX.ctrl$other.reg <- ifelse(dtm.CntryX.ctrl$other.reg > 0, 1, 0)
dtm.CntryX.ctrl$self.serve <- ifelse(dtm.CntryX.ctrl$self.serve > 0, 1, 0)
##sum(dtm.CntryX.ctrl$other.reg)  # this counts how many responses contain other-regarding vocab
##sum(dtm.CntryX.ctrl$self.serve) # this counts how many responses contain self-serving vocab

## Analysis for df.CntryX.treat
# Tokenization and normalization
dtm.CntryX.treat <- dfm(df.CntryX.treat$open_ended,      # input text
                        tolower = T, stem = T)           # lowercase and stem
##docfreq(dtm.CntryX.treat) # this shows the frequency per term

# Analyze the open-ended responses based on simple counting
dtm.CntryX.treat <- dfm_lookup(dtm.CntryX.treat, myDict)
dtm.CntryX.treat <- convert(dtm.CntryX.treat, to = "data.frame") # convert to data frame
dtm.CntryX.treat$other.reg <- ifelse(dtm.CntryX.treat$other.reg > 0, 1, 0)
dtm.CntryX.treat$self.serve <- ifelse(dtm.CntryX.treat$self.serve > 0, 1, 0)
##sum(dtm.CntryX.treat$other.reg)  # this counts how many responses contain other-regarding vocab
##sum(dtm.CntryX.treat$self.serve) # this counts how many responses contain self-serving vocab

## Store the results in a data frame
df.method1 <- data.frame(matrix(NA, nrow = 8, ncol = 8))
df.method1 <- df.method1 %>% 
  rename(Country = X1,
         Treatment = X2,
         Category = X3,
         Frequency = X4,
         Percentage = X5,
         Lower_bound = X6,
         Upper_bound = X7,
         Total = X8)
df.method1[,1] <- c("China", "China", "China", "China",
                    "Country X", "Country X", "Country X", "Country X")
df.method1[,2] <- c("No Additional Vignette", "No Additional Vignette",
                    "Additional Vignette", "Additional Vignette",
                    "No Additional Vignette", "No Additional Vignette",
                    "Additional Vignette", "Additional Vignette")
df.method1[,3] <- c("Other-Regarding", "Self-Serving",
                    "Other-Regarding", "Self-Serving",
                    "Other-Regarding", "Self-Serving",
                    "Other-Regarding", "Self-Serving")
df.method1[,4] <- c(sum(dtm.China.ctrl$other.reg), sum(dtm.China.ctrl$self.serve),
                    sum(dtm.China.treat$other.reg), sum(dtm.China.treat$self.serve),
                    sum(dtm.CntryX.ctrl$other.reg), sum(dtm.CntryX.ctrl$self.serve),
                    sum(dtm.CntryX.treat$other.reg), sum(dtm.CntryX.treat$self.serve))
df.method1[,8] <- c(sum(df.China.ctrl$content1 != -99), sum(df.China.ctrl$content1 != -99),
                    sum(df.China.treat$content1 != -99), sum(df.China.treat$content1 != -99),
                    sum(df.CntryX.ctrl$content1 != -99), sum(df.CntryX.ctrl$content1 != -99),
                    sum(df.CntryX.treat$content1 != -99), sum(df.CntryX.treat$content1 != -99))
df.method1$Percentage <- df.method1$Frequency / df.method1$Total * 100

# Compute the confidence intervals for each experimental group
for(i in 1:8) {
  temp <- prop.test(x = df.method1$Frequency[i], n = df.method1$Total[i],
                    correct = F)
  df.method1$Lower_bound[i] <- temp$conf.int[1] * 100
  df.method1$Upper_bound[i] <- temp$conf.int[2] * 100
}

## Test the statistical significance between the prop differences between groups
# Percentages of other-regarding responses with China as the trade partner
prop.test(x = c(df.method1$Frequency[1], df.method1$Frequency[3]), 
          n = c(df.method1$Total[1], df.method1$Total[3]),
          alternative = "two.sided", conf.level = .95, correct = F)
          # beta = 0.12, p = 0.0156, n = 388 (reported in Appendix F)

# Percentages of self-serving responses with China as the trade partner
prop.test(x = c(df.method1$Frequency[2], df.method1$Frequency[4]), 
          n = c(df.method1$Total[2], df.method1$Total[4]),
          alternative = "two.sided", conf.level = .95, correct = F)
          # beta = -0.11, p = 0.0201, n = 388 (reported in Appendix F)

# Percentages of other-regarding responses with Country X as the trade partner
prop.test(x = c(df.method1$Frequency[5], df.method1$Frequency[7]), 
          n = c(df.method1$Total[5], df.method1$Total[7]),
          alternative = "two.sided", conf.level = .95, correct = F)
          # beta = 0.13, p = 0.0105, n = 380 (reported in Appendix F)

# Percentages of self-serving responses with Country X as the trade partner
prop.test(x = c(df.method1$Frequency[6], df.method1$Frequency[8]), 
          n = c(df.method1$Total[6], df.method1$Total[8]),
          alternative = "two.sided", conf.level = .95, correct = F)
          # beta = -0.08, p = 0.0868, n = 380 (reported in Appendix F)

## Visualize the results for China
df.method1$Treatment <- 
  factor(df.method1$Treatment, levels = c("No Additional Vignette", "Additional Vignette"))
ggplot(data = subset(df.method1, Country %in% "China"), 
       aes(x = Category, y = Percentage, fill = Treatment)) +
  geom_bar(stat = "identity", position = position_dodge(), colour = "black") +
  scale_x_discrete(breaks = c("Other-Regarding", "Self-Serving")) +
  scale_fill_manual(values = c("grey90", "grey50")) +
  geom_errorbar(width = .2, aes(ymin = Lower_bound, ymax = Upper_bound), 
                position = position_dodge(.9)) +
  xlab("") + ylab("Percentage of Responses Mentioning Such Terms") +
  theme_classic() +
  theme(text = element_text(family = "Times", size = 14)) +
  theme(axis.text = element_text(colour = "black", size = 14),
        legend.justification = c(1,1), legend.position = c(0.98,0.98),
        legend.box.background = element_rect(color = "black"), 
        legend.key.size = unit(1.5, "line")) +
  coord_cartesian(ylim = c(0,70))
ggsave("dictionary_method1_China.pdf", width = 6, height = 6, dpi = 500)

## Visualize the results for Country X
ggplot(data = subset(df.method1, Country %in% "Country X"), 
       aes(x = Category, y = Percentage, fill = Treatment)) +
  geom_bar(stat = "identity", position = position_dodge(), colour = "black") +
  scale_x_discrete(breaks = c("Other-Regarding", "Self-Serving")) +
  scale_fill_manual(values = c("grey90", "grey50")) +
  geom_errorbar(width = .2, aes(ymin = Lower_bound, ymax = Upper_bound), 
                position = position_dodge(.9)) +
  xlab("") + ylab("Percentage of Responses Mentioning Such Terms") +
  theme_classic() +
  theme(text = element_text(family = "Times", size = 14)) +
  theme(axis.text = element_text(colour = "black", size = 14),
        legend.justification = c(1,1), legend.position = c(0.98,0.98),
        legend.box.background = element_rect(color = "black"), 
        legend.key.size = unit(1.5, "line")) +
  coord_cartesian(ylim = c(0,70))
ggsave("dictionary_method1_CntryX.pdf", width = 6, height = 6, dpi = 500)

### Figure A12: Percentages of "Harm/Care" and "Fairness/Reciprocity" Responses in the Win-Lose Scenario ###
## Create a dictionary for "care" and "fairness" responses
# See http://www.moralfoundations.org/ (Graham et al. 2009)
MFD.care <- 
  c("safe*", "peace*", "compassion*", "empath*", "sympath*", "care", "caring",
    "protect*", "shield", "shelter", "amity", "secur*", "benefit*", "defen*",
    "guard*", "preserve", "harm*", "suffer*", "war", "wars", "warl*", "warring",
    "fight*", "violen*", "hurt*", "kill", "kills", "killer*", "killed",
    "killing", "endanger*", "cruel*", "brutal*", "abuse*", "damag*", "ruin*",
    "ravage", "detriment*", "crush*", "attack*", "annihilate*", "destroy",
    "stomp", "abandon*", "spurn", "impair", "exploit", "exploits", "exploited",
    "exploiting", "wound*")
MFD.fair <- 
  c("fair", "fairly", "fairness", "fair-*", "fairmind*", "fairplay", "equal*",
    "justice", "justness", "justifi*", "reciproc*", "impartial*", "egalitar*",
    "rights", "equity", "evenness", "equivalent", "unbias*", "tolerant",
    "equable", "balance*", "homologous", "unprejudice*", "reasonable",
    "constant", "honest*", "unfair*", "unequal*", "bias*", "unjust*", "injust*",
    "bigot*", "discriminat*", "disproportion*", "inequitable", "prejud*",
    "dishonest", "unscrupulous", "dissociate", "preference", "favoritism",
    "segregat*", "exclusion", "exclud*")
myDict2 <- dictionary(list(care = MFD.care, fair = MFD.fair))

## Analysis for df.China.ctrl
# Tokenization and normalization
dtm.China.ctrl2 <- dfm(df.China.ctrl$open_ended,          # input text
                       tolower = T, stem = T)             # lowercase and stem

# Analyze the texts based on counting
dtm.China.ctrl2 <- dfm_lookup(dtm.China.ctrl2, myDict2)
dtm.China.ctrl2 <- convert(dtm.China.ctrl2, to = "data.frame") # convert to data frame
dtm.China.ctrl2$care <- ifelse(dtm.China.ctrl2$care > 0, 1, 0)
dtm.China.ctrl2$fair <- ifelse(dtm.China.ctrl2$fair > 0, 1, 0)
##sum(dtm.China.ctrl2$care) # this counts how many responses contain care vocab
##sum(dtm.China.ctrl2$fair) # this counts how many responses contain fairness vocab

## Analysis for df.China.treat
# Tokenization and normalization
dtm.China.treat2 <- dfm(df.China.treat$open_ended,        # input text
                        tolower = T, stem = T)            # lowercase and stem

# Analyze the texts based on counting
dtm.China.treat2 <- dfm_lookup(dtm.China.treat2, myDict2)
dtm.China.treat2 <- convert(dtm.China.treat2, to = "data.frame") # convert to data frame
dtm.China.treat2$care <- ifelse(dtm.China.treat2$care > 0, 1, 0)
dtm.China.treat2$fair <- ifelse(dtm.China.treat2$fair > 0, 1, 0)
##sum(dtm.China.treat2$care) # this counts how many responses contain care vocab
##sum(dtm.China.treat2$fair) # this counts how many responses contain fairness vocab

## Analysis for df.CntryX.ctrl
# Tokenization and normalization
dtm.CntryX.ctrl2 <- dfm(df.CntryX.ctrl$open_ended,        # input text
                        tolower = T, stem = T)            # lowercase and stem

# Analyze the texts based on counting
dtm.CntryX.ctrl2 <- dfm_lookup(dtm.CntryX.ctrl2, myDict2)
dtm.CntryX.ctrl2 <- convert(dtm.CntryX.ctrl2, to = "data.frame") # convert to data frame
dtm.CntryX.ctrl2$care <- ifelse(dtm.CntryX.ctrl2$care > 0, 1, 0)
dtm.CntryX.ctrl2$fair <- ifelse(dtm.CntryX.ctrl2$fair > 0, 1, 0)
##sum(dtm.CntryX.ctrl2$care) # this counts how many responses contain care vocab
##sum(dtm.CntryX.ctrl2$fair) # this counts how many responses contain fairness vocab

## Analysis for df.CntryX.treat
# Tokenization and normalization
dtm.CntryX.treat2 <- dfm(df.CntryX.treat$open_ended,      # input text
                         tolower = T, stem = T)           # lowercase and stem

# Analyze the texts based on counting
dtm.CntryX.treat2 <- dfm_lookup(dtm.CntryX.treat2, myDict2)
dtm.CntryX.treat2 <- convert(dtm.CntryX.treat2, to = "data.frame") # convert to data frame
dtm.CntryX.treat2$care <- ifelse(dtm.CntryX.treat2$care > 0, 1, 0)
dtm.CntryX.treat2$fair <- ifelse(dtm.CntryX.treat2$fair > 0, 1, 0)
##sum(dtm.CntryX.treat2$care) # this counts how many responses contain care vocab
##sum(dtm.CntryX.treat2$fair) # this counts how many responses contain fairness vocab

## Store the results in a data frame
df.method2 <- data.frame(matrix(NA, nrow = 8, ncol = 8))
df.method2 <- df.method2 %>% 
  rename(Country = X1,
         Treatment = X2,
         Category = X3,
         Frequency = X4,
         Percentage = X5,
         Lower_bound = X6,
         Upper_bound = X7,
         Total = X8)
df.method2[,1] <- c("China", "China", "China", "China",
                    "Country X", "Country X", "Country X", "Country X")
df.method2[,2] <- c("No Additional Vignette", "No Additional Vignette",
                    "Additional Vignette", "Additional Vignette",
                    "No Additional Vignette", "No Additional Vignette",
                    "Additional Vignette", "Additional Vignette")
df.method2[,3] <- c("Care", "Fairness", "Care", "Fairness",
                    "Care", "Fairness", "Care", "Fairness")
df.method2[,4] <- c(sum(dtm.China.ctrl2$care), sum(dtm.China.ctrl2$fair),
                    sum(dtm.China.treat2$care), sum(dtm.China.treat2$fair),
                    sum(dtm.CntryX.ctrl2$care), sum(dtm.CntryX.ctrl2$fair),
                    sum(dtm.CntryX.treat2$care), sum(dtm.CntryX.treat2$fair))
df.method2[,8] <- c(sum(df.China.ctrl$content1 != -99), sum(df.China.ctrl$content1 != -99),
                    sum(df.China.treat$content1 != -99), sum(df.China.treat$content1 != -99),
                    sum(df.CntryX.ctrl$content1 != -99), sum(df.CntryX.ctrl$content1 != -99),
                    sum(df.CntryX.treat$content1 != -99), sum(df.CntryX.treat$content1 != -99))
df.method2$Percentage <- df.method2$Frequency / df.method2$Total * 100

# Compute the confidence intervals for each experimental group
for(i in 1:8) {
  temp <- prop.test(x = df.method2$Frequency[i], n = df.method2$Total[i],
                    correct = F)
  df.method2$Lower_bound[i] <- temp$conf.int[1] * 100
  df.method2$Upper_bound[i] <- temp$conf.int[2] * 100
}

## Test the statistical significance between the prop differences between groups
# Percentages of "care" responses with China as the trade partner
prop.test(x = c(df.method2$Frequency[1], df.method2$Frequency[3]), 
          n = c(df.method2$Total[1], df.method2$Total[3]),
          alternative = "two.sided", conf.level = .95, correct = F)
          # beta = 0.16, p = 0.0011, n = 388 (reported in Appendix F)

# Percentages of "care" responses with Country X as the trade partner
prop.test(x = c(df.method2$Frequency[5], df.method2$Frequency[7]), 
          n = c(df.method2$Total[5], df.method2$Total[7]),
          alternative = "two.sided", conf.level = .95, correct = F)
          # beta = 0.13, p = 0.0111, n = 380 (reported in Appendix F)

## Visualize the results for China
df.method2$Treatment <- 
  factor(df.method2$Treatment, levels = c("No Additional Vignette", "Additional Vignette"))
ggplot(data = subset(df.method2, Country %in% "China"), 
       aes(x = Category, y = Percentage, fill = Treatment)) +
  geom_bar(stat = "identity", position = position_dodge(), colour = "black") +
  scale_x_discrete(breaks = c("Care", "Fairness")) +
  scale_fill_manual(values = c("grey90", "grey50")) +
  geom_errorbar(width = .2, aes(ymin = Lower_bound, ymax = Upper_bound), 
                position = position_dodge(.9)) +
  xlab("") + ylab("Percentage of Responses Mentioning Such Terms") +
  theme_classic() +
  theme(text = element_text(family = "Times", size = 14)) +
  theme(axis.text = element_text(colour = "black", size = 14),
        legend.justification = c(1,1), legend.position = c(0.98,0.98),
        legend.box.background = element_rect(color = "black"), 
        legend.key.size = unit(1.5, "line")) +
  coord_cartesian(ylim = c(0,60))
ggsave("dictionary_method2_China.pdf", width = 6, height = 6, dpi = 500)

## Visualize the results for Country X
ggplot(data = subset(df.method2, Country %in% "Country X"), 
       aes(x = Category, y = Percentage, fill = Treatment)) +
  geom_bar(stat = "identity", position = position_dodge(), colour = "black") +
  scale_x_discrete(breaks = c("Care", "Fairness")) +
  scale_fill_manual(values = c("grey90", "grey50")) +
  geom_errorbar(width = .2, aes(ymin = Lower_bound, ymax = Upper_bound), 
                position = position_dodge(.9)) +
  xlab("") + ylab("Percentage of Responses Mentioning Such Terms") +
  theme_classic() +
  theme(text = element_text(family = "Times", size = 14)) +
  theme(axis.text = element_text(colour = "black", size = 14),
        legend.justification = c(1,1), legend.position = c(0.98,0.98),
        legend.box.background = element_rect(color = "black"), 
        legend.key.size = unit(1.5, "line")) +
  coord_cartesian(ylim = c(0,60))
ggsave("dictionary_method2_CntryX.pdf", width = 6, height = 6, dpi = 500)
